1. Setup

These procedures consists of importing the data set into the R environment, activating extras libraries, and organising data for performing analyses.

1.1. Activating libraries

These are additional libraries used to specific data analysis.

library ("plotly")     # Used for creating plots
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library ("ggplot2")    # Used for creating charts
library ("mongolite")  # Used for retrieving data from mongoDB 
library ("effsize")    # Used for calculating Vargha-Delaney A effect size
library ("lsr")        # Used for calculating Cohen's d effect size
library ("pwr")        # Used for calculating the statistical power of tests
library ("gmodels")    # Used for calculating the Fisher's exact test

1.2. Importing dataset

# Note that the data collected from students was stored as a collection in a MongoDB database to facilitate data analysis. The JSON file for creating the collection should be available along with this analysis.

dataset <- mongo (url        = "mongodb://localhost:27018",
                  db         = "cloud-platform-experiment-2017",
                  collection = "dataset")

demographics <- mongo (url        = "mongodb://localhost:27018",
                  db         = "cloud-platform-experiment-2017",
                  collection = "demographics")

feedback <- mongo (url        = "mongodb://localhost:27018",
                  db         = "cloud-platform-experiment-2017",
                  collection = "feedback")

Mongo variables

print (dataset)
## <Mongo collection> 'dataset' 
##  $aggregate(pipeline = "{}", options = "{\"allowDiskUse\":true}", handler = NULL, pagesize = 1000) 
##  $count(query = "{}") 
##  $distinct(key, query = "{}") 
##  $drop() 
##  $export(con = stdout(), bson = FALSE) 
##  $find(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0, handler = NULL, pagesize = 1000) 
##  $import(con, bson = FALSE) 
##  $index(add = NULL, remove = NULL) 
##  $info() 
##  $insert(data, pagesize = 1000, ...) 
##  $iterate(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0) 
##  $mapreduce(map, reduce, query = "{}", sort = "{}", limit = 0, out = NULL, scope = NULL) 
##  $remove(query, just_one = FALSE) 
##  $rename(name, db = NULL) 
##  $update(query, update = "{\"$set\":{}}", upsert = FALSE, multiple = FALSE)

2. Demographics

2.1. Summary

demographics$find()
##    id age            gender cloudKnowledge createAWSvm createAZUREvm
## 1   8  22              Male              1          No            No
## 2   7  23              Male              1          No            No
## 3   5  19              Male              2          No            No
## 4   6  23 Prefer not to say              2          No            No
## 5   4  22              Male              3          No           Yes
## 6   2  22              Male              2          No           Yes
## 7  11  20              Male              1         Yes           Yes
## 8   1  21              Male              1         Yes           Yes
## 9  10  20              Male              2          No            No
## 10  3  20 Prefer not to say              1         Yes            No
## 11  9  19              Male              1         Yes           Yes

2.2. How old are you?

age = demographics$find(fields = "{\"age\": 1, \"_id\": 0}")

table (age)
## age
## 19 20 21 22 23 
##  2  3  1  3  2
plot_ly(
  data.frame(table(age)), 
  labels = ~age, 
  values = ~Freq, 
  type = 'pie',
  sort = FALSE,
  direction = "clockwise",
  textposition = 'inside',
  textfont = list (size = 14), # 28
  textinfo = 'label+percent',
  insidetextfont = list(color = '#FFFFFF'),
  
  marker = list(colors = colors,
                line = list(color = '#FFFFFF', width = 1)))

2.3. What is your gender?

gender = demographics$find(fields = "{\"gender\": 1, \"_id\": 0}")

table (gender)
## gender
##              Male Prefer not to say 
##                 9                 2
plot_ly(
  data.frame(table(gender)), 
  labels = ~gender, 
  values = ~Freq, 
  type = 'pie',
  sort = FALSE,
  direction = "clockwise",
  textposition = 'inside',
  textfont = list (size = 14), # 28
  textinfo = 'label+percent',
  insidetextfont = list(color = '#FFFFFF'),
  
  marker = list(colors = colors,
                line = list(color = '#FFFFFF', width = 1)))

2.4. How would you evaluate your current knowledge on cloud computing?

cloudKnowledge = demographics$find(fields = "{\"cloudKnowledge\": 1, \"_id\": 0}")

table (cloudKnowledge)
## cloudKnowledge
## 1 2 3 
## 6 4 1
plot_ly(
  data.frame(table(cloudKnowledge)), 
  x = ~Freq,
  y = ~cloudKnowledge, 
  orientation = "h",
  type = "bar",
  marker = list(color = 'rgba(176,196,222, 0.6)',
                line = list(color = 'rgba(176,196,222, 0.6)', 
                width = 1))
  ) 

2.5. Have you ever created AWS EC2 virtual machines for your own use or work?

createAWSvm = demographics$find(fields = "{\"createAWSvm\": 1, \"_id\": 0}")

table (createAWSvm)
## createAWSvm
##  No Yes 
##   7   4
plot_ly(
  data.frame(table(createAWSvm)), 
  labels = ~createAWSvm, 
  values = ~Freq, 
  type = 'pie',
  sort = FALSE,
  direction = "clockwise",
  textposition = 'inside',
  textfont = list (size = 14), # 28
  textinfo = 'label+percent',
  insidetextfont = list(color = '#FFFFFF'),
  
  marker = list(colors = colors,
                line = list(color = '#FFFFFF', width = 1)))

2.6. Have you ever created Azure virtual machines for your own use or work?

createAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"_id\": 0}")

table (createAZUREvm)
## createAZUREvm
##  No Yes 
##   6   5
plot_ly(
  data.frame(table(createAZUREvm)), 
  labels = ~createAZUREvm, 
  values = ~Freq, 
  type = 'pie',
  sort = FALSE,
  direction = "clockwise",
  textposition = 'inside',
  textfont = list (size = 14), # 28
  textinfo = 'label+percent',
  insidetextfont = list(color = '#FFFFFF'),
  
  marker = list(colors = colors,
                line = list(color = '#FFFFFF', width = 1)))

2.7. Analysis: Have you ever created AWS EC2 AND/OR Azure virtual machines for your own use or work?

createAWSandAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"createAWSvm\": 1, \"_id\": 0}")

print (createAWSandAZUREvm)
##    createAWSvm createAZUREvm
## 1           No            No
## 2           No            No
## 3           No            No
## 4           No            No
## 5           No           Yes
## 6           No           Yes
## 7          Yes           Yes
## 8          Yes           Yes
## 9           No            No
## 10         Yes            No
## 11         Yes           Yes
table (createAWSandAZUREvm)
##            createAZUREvm
## createAWSvm No Yes
##         No   5   2
##         Yes  1   3

3. Efficiency

3.1. Dataset

print (paste("AWS data points", dataset$count(query = "{\"platform\": \"AWS\"}")))
## [1] "AWS data points 76"
print (paste("Azure data points", dataset$count(query = "{\"platform\": \"Azure\"}")))
## [1] "Azure data points 76"

3.2. Platform

Plot

ggplot(
  data = dataset$find(fields = "{\"platform\": 1, \"seconds\": 1, \"_id\": 0}")
) + 
  geom_boxplot(
    aes (
      x = platform, y = seconds,
      colour = platform
      )
  ) +
   theme(
    # text = element_text(size=20),
    plot.title = element_text(hjust = 0.5, margin = margin(15, 0, 15, 0)),
    axis.title.y = element_text (margin = margin (0, 20, 0, 0)),
    axis.title.x = element_text (margin = margin (20, 0, 0, 0)),
    legend.position = "bottom",
    legend.box.background = element_rect(),
    legend.box.margin = margin (5, 5, 5, 5),
    legend.key.width = unit (1, "cm")
  )  +
  scale_x_discrete (labels = c("AWS", "Azure")) + 
  ylab ("Efficiency") + 
  xlab ("Platform") + 
  ggtitle ("Median Efficiency") + 
  scale_y_continuous (breaks=seq(from=0, to=1300, by=150)) + 
  theme (legend.position="none")

Descriptive Statistics

awsSeconds = dataset$find(query = "{\"platform\": \"AWS\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
azureSeconds = dataset$find(query = "{\"platform\": \"Azure\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds

print ("AWS Median")
## [1] "AWS Median"
summary (awsSeconds)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    30.0   110.5   171.0   222.3   231.5  1104.0
print ("Azure Median")
## [1] "Azure Median"
summary (azureSeconds)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    84.0   167.5   229.0   318.3   375.2  1276.0

Inferential Statistics

awsSeconds = dataset$find(query = "{\"platform\": \"AWS\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
azureSeconds = dataset$find(query = "{\"platform\": \"Azure\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds

 wilcox.test(
  x = awsSeconds,
  y = azureSeconds,
  mu = 0,
  paired = T,
  conf.int = T
)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  awsSeconds and azureSeconds
## V = 617, p-value = 1.2e-05
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -115.99996  -39.99994
## sample estimates:
## (pseudo)median 
##      -73.49998

Effect Size

awsSeconds = dataset$find(query = "{\"platform\": \"AWS\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
azureSeconds = dataset$find(query = "{\"platform\": \"Azure\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds

print (1 - VD.A (
            awsSeconds,
            azureSeconds
            )$estimate
      )
## [1] 0.6862881

Statistical Power

awsSeconds = dataset$find(query = "{\"platform\": \"AWS\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds
azureSeconds = dataset$find(query = "{\"platform\": \"Azure\"}", fields = "{\"seconds\": 1, \"_id\": 0}")$seconds

# Identifying Cohen's d for statistical power calculation
ef.d <- cohensD (awsSeconds, azureSeconds, method = "paired") 

print(ef.d)
## [1] 0.4382009
# Running power test
pwr.t.test(
  n = dataset$count(query = "{\"platform\": \"AWS\"}"), # Paired sample = 76 observations
  sig.level = 0.05, 
  d = ef.d, 
  type = "paired"
)
## 
##      Paired t test power calculation 
## 
##               n = 76
##               d = 0.4382009
##       sig.level = 0.05
##           power = 0.9649161
##     alternative = two.sided
## 
## NOTE: n is number of *pairs*

3.3. Task

Plot

ggplot(
  data = dataset$find(fields = "{\"task\": 1, \"platform\": 1, \"seconds\": 1, \"_id\": 0}")
) + 
  geom_boxplot(
    aes (
      x = platform, y = seconds,
      colour = platform
      )
  ) +
   theme(
    # text = element_text(size=20),
    plot.title = element_text(hjust = 0.5, margin = margin(15, 0, 15, 0)),
    axis.title.y = element_text (margin = margin (0, 20, 0, 0)),
    axis.title.x = element_text (margin = margin (20, 0, 0, 0)),
    legend.position = "bottom",
    legend.box.background = element_rect(),
    legend.box.margin = margin (5, 5, 5, 5),
    legend.key.width = unit (1, "cm")
  )  +
  scale_x_discrete (labels = c("AWS", "Azure")) + 
  ylab ("Efficiency") + 
  ggtitle ("Median Efficiency by Task") + 
  theme (legend.position="none") +
facet_wrap(
  ~ task,
  scales = "free"
)

Descriptive Statistics

for (task.var in 1:7) {
  queryStringAWS   <- paste0 ("{\"task\":", task.var, ", \"platform\": \"AWS\"}")
  queryStringAzure <- paste0 ("{\"task\":", task.var, ", \"platform\": \"Azure\"}")
  
  efficiencyAWS   <- dataset$find(query = queryStringAWS,   fields = "{\"seconds\": 1, \"_id\": 0}")
  efficiencyAzure <- dataset$find(query = queryStringAzure, fields = "{\"seconds\": 1, \"_id\": 0}")
  
  print (paste0("Task ", task.var, " AWS Summary = "))
  summaryEfficiencyAWS <- summary (efficiencyAWS$seconds)
  print (summaryEfficiencyAWS)
  
  print (paste0("Task ", task.var, " Azure Summary = "))
  summaryEfficiencyAzure <- summary (efficiencyAzure$seconds)
  print (summaryEfficiencyAzure)
  
  print (paste0("Difference task ", task.var, " = ", summaryEfficiencyAWS[[3]] - summaryEfficiencyAzure[[3]]))
}
## [1] "Task 1 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    30.0    87.0   138.0   169.8   199.5   567.0 
## [1] "Task 1 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    84.0   151.5   181.0   180.0   212.0   288.0 
## [1] "Difference task 1 = -43"
## [1] "Task 2 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   109.0   171.0   183.0   253.2   270.0   787.0 
## [1] "Task 2 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   107.0   191.5   244.0   346.5   346.5  1276.0 
## [1] "Difference task 2 = -61"
## [1] "Task 3 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    42.0   106.5   142.0   172.1   176.0   433.0 
## [1] "Task 3 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   111.0   148.8   197.0   291.9   326.8   881.0 
## [1] "Difference task 3 = -55"
## [1] "Task 4 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    93.0   129.5   180.0   182.4   219.0   290.0 
## [1] "Task 4 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   150.0   185.5   220.0   272.2   265.0   605.0 
## [1] "Difference task 4 = -40"
## [1] "Task 5 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    64.0   101.0   180.0   220.5   249.0   595.0 
## [1] "Task 5 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   150.0   233.5   276.0   373.1   469.0   827.0 
## [1] "Difference task 5 = -96"
## [1] "Task 6 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    60.0    77.5   120.0   147.7   169.5   400.0 
## [1] "Task 6 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   127.0   160.5   220.0   271.3   339.5   534.0 
## [1] "Difference task 6 = -100"
## [1] "Task 7 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   120.0   175.5   236.0   406.6   434.0  1104.0 
## [1] "Task 7 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   140.0   220.5   427.0   490.9   716.0  1037.0 
## [1] "Difference task 7 = -191"

Effect Size

for (task.var in 1:7) {
  queryStringAWS   <- paste0 ("{\"task\":", task.var, ", \"platform\": \"AWS\"}")
  queryStringAzure <- paste0 ("{\"task\":", task.var, ", \"platform\": \"Azure\"}")
  
  efficiencyAWS   <- dataset$find(query = queryStringAWS,   fields = "{\"seconds\": 1, \"_id\": 0}")
  efficiencyAzure <- dataset$find(query = queryStringAzure, fields = "{\"seconds\": 1, \"_id\": 0}")  
  
  effectSize <- VD.A (
    efficiencyAWS$seconds,
    efficiencyAzure$seconds
  )
  
  print (paste0("Task ", task.var, " effect size = ", 1 - effectSize$estimate))
}
## [1] "Task 1 effect size = 0.661157024793388"
## [1] "Task 2 effect size = 0.628099173553719"
## [1] "Task 3 effect size = 0.709090909090909"
## [1] "Task 4 effect size = 0.704545454545455"
## [1] "Task 5 effect size = 0.768595041322314"
## [1] "Task 6 effect size = 0.809917355371901"
## [1] "Task 7 effect size = 0.603305785123967"

3.4. Participant

Plot

ggplot(
  data = dataset$find(fields = "{\"participant\": 1, \"platform\": 1, \"seconds\": 1, \"_id\": 0}")
) + 
  geom_boxplot(
    aes (
      x = platform, y = seconds,
      colour = platform
      )
  ) +
   theme(
    # text = element_text(size=20),
    plot.title = element_text(hjust = 0.5, margin = margin(15, 0, 15, 0)),
    axis.title.y = element_text (margin = margin (0, 20, 0, 0)),
    axis.title.x = element_text (margin = margin (20, 0, 0, 0)),
    legend.position = "bottom",
    legend.box.background = element_rect(),
    legend.box.margin = margin (5, 5, 5, 5),
    legend.key.width = unit (1, "cm")
  )  +
  scale_x_discrete (labels = c("AWS", "Azure")) + 
  ylab ("Efficiency") + 
  ggtitle ("Median Efficiency by Participant") + 
  theme (legend.position="none") +
facet_wrap(
  ~ participant,
  scales = "free"
)

Descriptive Statistics

for (participant.var in 1:11) {
  queryStringAWS   <- paste0 ("{\"participant\":", participant.var, ", \"platform\": \"AWS\"}")
  queryStringAzure <- paste0 ("{\"participant\":", participant.var, ", \"platform\": \"Azure\"}")
  
  efficiencyAWS   <- dataset$find(query = queryStringAWS,   fields = "{\"seconds\": 1, \"_id\": 0}")
  efficiencyAzure <- dataset$find(query = queryStringAzure, fields = "{\"seconds\": 1, \"_id\": 0}")
  
  print (paste0("Participant ", participant.var, " AWS Summary = "))
  summaryEfficiencyAWS <- summary (efficiencyAWS$seconds)
  print (summaryEfficiencyAWS)
  
  print (paste0("Participant ", participant.var, " Azure Summary = "))
  summaryEfficiencyAzure <- summary (efficiencyAzure$seconds)
  print (summaryEfficiencyAzure)
  
  print (paste0("Difference participant", participant.var, " = ", summaryEfficiencyAWS[[3]] - summaryEfficiencyAzure[[3]]))
}
## [1] "Participant 1 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   150.0   182.5   210.0   250.7   290.0   450.0 
## [1] "Participant 1 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   152.0   160.0   163.0   216.1   260.5   357.0 
## [1] "Difference participant1 = 47"
## [1] "Participant 2 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   120.0   133.0   195.0   227.6   306.0   400.0 
## [1] "Participant 2 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   140.0   223.0   276.0   400.3   529.5   881.0 
## [1] "Difference participant2 = -81"
## [1] "Participant 3 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    85.0   171.0   216.0   388.9   524.5  1030.0 
## [1] "Participant 3 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   200.0   221.0   300.0   458.7   496.5  1276.0 
## [1] "Difference participant3 = -84"
## [1] "Participant 4 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   108.0   130.0   220.0   206.9   230.0   400.0 
## [1] "Participant 4 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   145.0   170.0   210.0   257.4   223.5   660.0 
## [1] "Difference participant4 = 10"
## [1] "Participant 5 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    70.0   103.0   109.0   112.9   127.5   150.0 
## [1] "Participant 5 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   169.0   197.5   231.0   248.9   242.5   462.0 
## [1] "Difference participant5 = -122"
## [1] "Participant 6 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     175     189     290     425     514    1104 
## [1] "Participant 6 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   219.0   235.5   326.0   382.0   415.5   827.0 
## [1] "Difference participant6 = -36"
## [1] "Participant 7 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    93.0   118.0   144.0   153.4   189.5   222.0 
## [1] "Participant 7 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   106.0   130.5   203.0   299.7   377.0   774.0 
## [1] "Difference participant7 = -59"
## [1] "Participant 8 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    30.0    53.0    69.0   154.7   230.0   418.0 
## [1] "Participant 8 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   143.0   162.0   265.0   328.3   323.0   920.0 
## [1] "Difference participant8 = -196"
## [1] "Participant 9 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   149.0   168.5   207.0   269.9   314.5   567.0 
## [1] "Participant 9 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   113.0   180.0   281.0   283.6   333.5   564.0 
## [1] "Difference participant9 = -74"
## [1] "Participant 10 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    77.0    96.0   108.0   134.2   165.0   236.0 
## [1] "Participant 10 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   127.0   194.5   234.0   365.6   386.0  1037.0 
## [1] "Difference participant10 = -126"
## [1] "Participant 11 AWS Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    55.0    70.0    90.0   108.4   136.5   201.0 
## [1] "Participant 11 Azure Summary = "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    84.0   114.0   142.5   251.3   357.8   605.0 
## [1] "Difference participant11 = -52.5"

Effect Size

for (participant.var in 1:11) {
  queryStringAWS   <- paste0 ("{\"participant\":", participant.var, ", \"platform\": \"AWS\"}")
  queryStringAzure <- paste0 ("{\"participant\":", participant.var, ", \"platform\": \"Azure\"}")
  
  efficiencyAWS   <- dataset$find(query = queryStringAWS,   fields = "{\"seconds\": 1, \"_id\": 0}")
  efficiencyAzure <- dataset$find(query = queryStringAzure, fields = "{\"seconds\": 1, \"_id\": 0}")  
  
  effectSize <- VD.A (
    efficiencyAWS$seconds,
    efficiencyAzure$seconds
  )
  
  print (paste0("Participant ", participant.var, " effect size = ", 1 - effectSize$estimate))
}
## [1] "Participant 1 effect size = 0.387755102040816"
## [1] "Participant 2 effect size = 0.755102040816326"
## [1] "Participant 3 effect size = 0.673469387755102"
## [1] "Participant 4 effect size = 0.540816326530612"
## [1] "Participant 5 effect size = 1"
## [1] "Participant 6 effect size = 0.561224489795918"
## [1] "Participant 7 effect size = 0.693877551020408"
## [1] "Participant 8 effect size = 0.775510204081633"
## [1] "Participant 9 effect size = 0.510204081632653"
## [1] "Participant 10 effect size = 0.857142857142857"
## [1] "Participant 11 effect size = 0.738095238095238"

4. Effectiveness

4.1. Platform

Plot

correctAWS = dataset$find(query = "{\"platform\": \"AWS\"}", 
                          fields = "{\"correct\": 1, \"_id\": 0}")

correctAzure = dataset$find(query = "{\"platform\": \"Azure\"}", 
                            fields = "{\"correct\": 1, \"platform\": 1, \"_id\": 0}")

platforms <- c("AWS", "Azure")
correct <- c(70, 63) 
incorrect <- c(6, 13)

data <- data.frame(platforms, correct, incorrect)

plot_ly(
  data, 
  x = ~platforms, 
  y = ~correct, 
  type = 'bar', 
  name = 'Correct'
  ) %>%
  add_trace(
    y = ~incorrect, 
    name = 'Incorrect'
  ) %>%
  layout(
    yaxis = list(title = 'Count'), 
    barmode = 'stack'
  )

Descriptive Statistics

correct = dataset$find(fields = "{\"correct\": 1, \"platform\": 1, \"_id\": 0}")

table (correct)
##         correct
## platform no yes
##    AWS    6  70
##    Azure 13  63

Inferential Statistics & Effect Size

correct = dataset$find(fields = "{\"correct\": 1, \"platform\": 1, \"_id\": 0}")

CrossTable(table(correct), fisher = TRUE, chisq = FALSE, expected = TRUE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |              Expected N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  152 
## 
##  
##              | correct 
##     platform |        no |       yes | Row Total | 
## -------------|-----------|-----------|-----------|
##          AWS |         6 |        70 |        76 | 
##              |     9.500 |    66.500 |           | 
##              |     1.289 |     0.184 |           | 
##              |     0.079 |     0.921 |     0.500 | 
##              |     0.316 |     0.526 |           | 
##              |     0.039 |     0.461 |           | 
## -------------|-----------|-----------|-----------|
##        Azure |        13 |        63 |        76 | 
##              |     9.500 |    66.500 |           | 
##              |     1.289 |     0.184 |           | 
##              |     0.171 |     0.829 |     0.500 | 
##              |     0.684 |     0.474 |           | 
##              |     0.086 |     0.414 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |        19 |       133 |       152 | 
##              |     0.125 |     0.875 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.947368     d.f. =  1     p =  0.08601752 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 =  2.165414     d.f. =  1     p =  0.1411466 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Sample estimate odds ratio:  0.4177346 
## 
## Alternative hypothesis: true odds ratio is not equal to 1
## p =  0.1396491 
## 95% confidence interval:  0.1225451 1.263983 
## 
## Alternative hypothesis: true odds ratio is less than 1
## p =  0.06982455 
## 95% confidence interval:  0 1.08485 
## 
## Alternative hypothesis: true odds ratio is greater than 1
## p =  0.97624 
## 95% confidence interval:  0.1481983 Inf 
## 
## 
## 

4.2. Task

Descriptive Statistics

correctAWS = dataset$find(query = "{\"platform\": \"AWS\"}", 
                          fields = "{\"correct\": 1, \"task\": 1, \"_id\": 0}")

correctAzure = dataset$find(query = "{\"platform\": \"Azure\"}", 
                            fields = "{\"correct\": 1, \"task\": 1, \"_id\": 0}")

print ("Correct AWS")
## [1] "Correct AWS"
table (correctAWS)
##     correct
## task no yes
##    1  0  11
##    2  1  10
##    3  0  11
##    4  2   8
##    5  1  10
##    6  0  11
##    7  2   9
print ("Correct Azure")
## [1] "Correct Azure"
table (correctAzure)
##     correct
## task no yes
##    1  1  10
##    2  1  10
##    3  2   8
##    4  1  10
##    5  3   8
##    6  3   8
##    7  2   9
cbind (table (correctAWS), table(correctAzure))
##   no yes no yes
## 1  0  11  1  10
## 2  1  10  1  10
## 3  0  11  2   8
## 4  2   8  1  10
## 5  1  10  3   8
## 6  0  11  3   8
## 7  2   9  2   9

4.3. Participant

Descriptive Statistics

correctAWS = dataset$find(query = "{\"platform\": \"AWS\"}", 
                          fields = "{\"correct\": 1, \"participant\": 1, \"_id\": 0}")

correctAzure = dataset$find(query = "{\"platform\": \"Azure\"}", 
                            fields = "{\"correct\": 1, \"participant\": 1, \"_id\": 0}")

print ("Correct AWS")
## [1] "Correct AWS"
table (correctAWS)
##            correct
## participant no yes
##          1   0   7
##          2   2   5
##          3   0   7
##          4   0   7
##          5   1   6
##          6   3   4
##          7   0   7
##          8   0   7
##          9   0   7
##          10  0   6
##          11  0   7
print ("Correct Azure")
## [1] "Correct Azure"
table (correctAzure)
##            correct
## participant no yes
##          1   0   7
##          2   6   1
##          3   2   5
##          4   0   7
##          5   2   5
##          6   1   6
##          7   0   7
##          8   0   7
##          9   0   7
##          10  2   5
##          11  0   6
cbind (table (correctAWS), table(correctAzure))
##    no yes no yes
## 1   0   7  0   7
## 2   2   5  6   1
## 3   0   7  2   5
## 4   0   7  0   7
## 5   1   6  2   5
## 6   3   4  1   6
## 7   0   7  0   7
## 8   0   7  0   7
## 9   0   7  0   7
## 10  0   6  2   5
## 11  0   7  0   6

5. Satisfaction

5.1. Which was the platform’s GUI that best benefits user interaction?

Plot

gui = feedback$find(fields = "{\"bestGUI\": 1, \"_id\": 0}")

table (gui)
## gui
##   AWS Azure 
##     7     4
plot_ly(
  data.frame(table(gui)), 
  labels = ~gui, 
  values = ~Freq, 
  type = 'pie',
  sort = FALSE,
  direction = "clockwise",
  textposition = 'inside',
  textfont = list (size = 14), # 28
  textinfo = 'label+percent',
  insidetextfont = list(color = '#FFFFFF'),
  
  marker = list(colors = colors,
                line = list(color = '#FFFFFF', width = 1)))

5.1.1. How does it relate to previous experience?

Descriptive Statistics

gui = feedback$find(fields = "{\"bestGUI\": 1, \"id\": 1, \"_id\": 0}", sort = "{\"id\": 1}")
createAWSandAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"createAWSvm\": 1, \"_id\": 0}", sort = "{\"participant\": 1}")

cbind(gui, createAWSandAZUREvm)
##    id bestGUI createAWSvm createAZUREvm
## 1   1   Azure          No            No
## 2   2   Azure          No            No
## 3   3     AWS          No            No
## 4   4     AWS          No            No
## 5   5     AWS          No           Yes
## 6   6   Azure          No           Yes
## 7   7     AWS         Yes           Yes
## 8   8     AWS         Yes           Yes
## 9   9   Azure          No            No
## 10 10     AWS         Yes            No
## 11 11     AWS         Yes           Yes

5.2. Which was the easiest platform to perform the experiment tasks?

Plot

ease = feedback$find(fields = "{\"easiestPlatform\": 1, \"_id\": 0}")

print (ease)
##                              easiestPlatform
## 1                                        AWS
## 2                                        AWS
## 3  Could not observe significant differences
## 4                                        AWS
## 5                                        AWS
## 6                                        AWS
## 7                                        AWS
## 8                                        AWS
## 9                                        AWS
## 10                                     Azure
## 11                                       AWS
plot_ly(
  data.frame(table(ease)), 
  labels = ~ease, 
  values = ~Freq, 
  type = 'pie',
  sort = FALSE,
  direction = "clockwise",
  textposition = 'inside',
  textfont = list (size = 14), # 28
  textinfo = 'label+percent',
  insidetextfont = list(color = '#FFFFFF'),
  
  marker = list(colors = colors,
                line = list(color = '#FFFFFF', width = 1))
)

5.2.1. How does it relate to previous experience?

Descriptive Statistics

ease = feedback$find(fields = "{\"easiestPlatform\": 1, \"_id\": 0}", sort = "{\"id\": 1}")
createAWSandAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"createAWSvm\": 1, \"_id\": 0}", sort = "{\"participant\": 1}")

cbind(ease, createAWSandAZUREvm)
##                              easiestPlatform createAWSvm createAZUREvm
## 1                                        AWS          No            No
## 2                                        AWS          No            No
## 3                                        AWS          No            No
## 4                                        AWS          No            No
## 5                                        AWS          No           Yes
## 6                                      Azure          No           Yes
## 7  Could not observe significant differences         Yes           Yes
## 8                                        AWS         Yes           Yes
## 9                                        AWS          No            No
## 10                                       AWS         Yes            No
## 11                                       AWS         Yes           Yes

5.3. Which platform would you prefer to use to create virtual machines?

Plot

preference = feedback$find(fields = "{\"favoritePlatform\": 1, \"_id\": 0}")

print (preference)
##    favoritePlatform
## 1               AWS
## 2               AWS
## 3               AWS
## 4               AWS
## 5               AWS
## 6               AWS
## 7               AWS
## 8             Azure
## 9               AWS
## 10            Azure
## 11              AWS
plot_ly(
  data.frame(table(preference)), 
  labels = ~preference, 
  values = ~Freq, 
  type = 'pie',
  sort = FALSE,
  direction = "clockwise",
  textposition = 'inside',
  textfont = list (size = 14), # 28
  textinfo = 'label+percent',
  insidetextfont = list(color = '#FFFFFF'),
  
  marker = list(colors = colors,
                line = list(color = '#FFFFFF', width = 1))
)

5.3.1. How does it relate to previous experience?

Descriptive Statistics

preference = feedback$find(fields = "{\"favoritePlatform\": 1, \"_id\": 0}", sort = "{\"id\": 1}")
createAWSandAZUREvm = demographics$find(fields = "{\"createAZUREvm\": 1, \"createAWSvm\": 1, \"_id\": 0}", sort = "{\"participant\": 1}")

cbind(preference, createAWSandAZUREvm)
##    favoritePlatform createAWSvm createAZUREvm
## 1               AWS          No            No
## 2               AWS          No            No
## 3               AWS          No            No
## 4               AWS          No            No
## 5               AWS          No           Yes
## 6             Azure          No           Yes
## 7               AWS         Yes           Yes
## 8               AWS         Yes           Yes
## 9             Azure          No            No
## 10              AWS         Yes            No
## 11              AWS         Yes           Yes